# REPLICATION
# Place-Based Campaigning: The Political Impact of Real Grassroots Mobilization
# Daniel Bischof and Thomas Kurer
# Journal of Politics

# Data Collection: Spatial Aggregation of Event-Level Data to Municipality

# read and transform shape file

shape <- sf::st_read("./../../data_original/Shapefile/Limiti_2016_ED50_g/Com2016_ED50_g/Com2016_ED50_g.shp", stringsAsFactors=FALSE)
# transform coordinates to lat/lon
shape <- sf::st_transform(shape, "+proj=longlat +ellps=WGS84 +datum=WGS84")
class(shape)
str(shape)
sf::st_crs(shape)

# export area for population density

area <- shape %>% dplyr::select(PRO_COM, COMUNE, SHAPE_Area)
area <- as.data.frame(area)
area <- area[,1:3]

write.csv(area, "./../../data_original/comune info/area.csv")

# read meetup events data

meetup <- read.csv("./../../data_original/M5S/0_events_20181206.csv")

groups <- meetup %>% 
  dplyr::group_by(group_name) %>% dplyr::filter(row_number(group_lon) == 1) %>% 
  dplyr::select(group_name, group_lon, group_lat) %>% dplyr::ungroup()

# geocodes flawed for one group in sardegna
groups$group_lon[groups$group_name=="Amici di Beppe Grillo San Teodoro"] <- 9.67
groups$group_lat[groups$group_name=="Amici di Beppe Grillo San Teodoro"] <- 40.77

points <- groups %>% dplyr::select(group_lon, group_lat) %>% dplyr::rename(lon=group_lon, lat=group_lat)

# create a points collection
pnts_sf <- do.call("st_sfc",c(lapply(1:nrow(points), 
function(i) {st_point(as.numeric(points[i, ]))}), list("crs" = 4326))) 
class(pnts_sf)
str(pnts_sf)
sf::st_crs(pnts_sf)


pnts_trans <- st_transform(pnts_sf, 2163) # apply transformation to pnts sf
shape_trans <- st_transform(shape, 2163)      # apply transformation to polygons sf

# intersect and extract comune id
points$comune <- apply(st_intersects(shape_trans, pnts_trans, sparse = FALSE), 2, 
               function(col) { 
                 shape_trans[which(col), ]$PRO_COM
               })

points$comune <- unlist(lapply(points$comune,function(x) if(identical(x,numeric(0))) NA else x))

groups_comune <- cbind(groups, points)

# fix NA cases with nearest neighbor ----

case_NA <- groups_comune %>% dplyr::filter(is.na(comune))

# drop points outside of italy

#Northernmost point: Westliches Zwillingsköpfl, Predoi, Alto Adige at 47°5′N 12°11′E
#Southernmost point on the mainland: Capo Spartivento, Calabria at 37°56′N 16°3′E; on Lampedusa, Sicily: Punta Pesce Spada, at 35°29′N 12°36′E
#Westernmost point: Rocca Bernauda, Bardonecchia, Piedmont at 45°6′N 6°37′E
#Easternmost point: Capo d'Otranto, Otranto, Apulia at 40°6′N 18°31′E

case_NA <- case_NA %>% dplyr::filter(lat>=37.5&lat<=47&lon>=6.3&lon<=18.3)

case_NA <-  case_NA %>% dplyr::filter(!grepl("Svizzera",group_name))
case_NA <-  case_NA %>% dplyr::filter(!grepl("Valais",group_name))


NA_points <- case_NA %>% dplyr::select(lon, lat) 


# create a points collection
NA_points_sf <- do.call("st_sfc",c(lapply(1:nrow(NA_points), 
function(i) {st_point(as.numeric(NA_points[i, ]))}), list("crs" = 4326))) 
class(NA_points_sf)
str(NA_points_sf)
sf::st_crs(NA_points_sf)

# convert to planar

NA_points_trans <- st_transform(NA_points_sf, 2163) # apply transformation to pnts sf

NA_shape_trans <- shape_trans

st_crs(NA_shape_trans) == st_crs(NA_points_trans)

# find nearest neighbor and extract state name
closestNA <- list()
for(i in seq_len(nrow(NA_points))){
    closestNA[[i]] <- NA_shape_trans[which.min(st_distance(NA_shape_trans, NA_points_trans[i])),4]
}

closest_comune_NA <- do.call(rbind, closestNA)
#st_geometry(closest_comune_NA) <- NULL

case_NA <- cbind(case_NA, as.data.frame(closest_comune_NA[,1]))
case_NA <- case_NA %>% dplyr::select(group_name, group_lon, group_lat, lon, lat, PRO_COM) %>% dplyr::rename(comune=PRO_COM)

# combine files

# clean original files, no sparse cases, no missings (groups abroad)
groups_match <- groups_comune %>% dplyr::filter(!is.na(comune))
groups_NA <- case_NA

groups_comune <- rbind(groups_match, groups_NA)

groups_comune <- groups_comune[,c(1:3,6)]

# add comune information

name <- as.data.frame(shape[,1:5])
name <- name %>% dplyr::select(-geometry)

groups_comune_full <- merge(groups_comune, name, by.x="comune", by.y="PRO_COM", all.x=TRUE)

# merge with events-level file

groups_comune_merge <- groups_comune_full %>%
  dplyr::rename(comune_id=comune,
                region_id=COD_REG,
                terunit_id=COD_CM,
                province_id=COD_PRO,
                comune_name=COMUNE) %>%
  dplyr::select(region_id, terunit_id, province_id, comune_id, comune_name, group_name)

events_groups_comune <- merge(meetup, groups_comune_merge, by=c("group_name"))

setdiff(meetup$group_name, events_groups_comune$group_name)
# all non-italian groups are dropped, -31 groups

nrow(meetup)-nrow(events_groups_comune)
# all non-italian groups are dropped, -2630 events

write.csv(events_groups_comune, "./../../data_coded/events_groups_comune.csv")




